<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no" name="viewport"/>
  <title>
   Spark 1.5.0发布：新增机器学习算法工具，扩展Spark R API  | 数螺 | NAUT IDEA
  </title>
  <link href="http://cdn.bootcss.com/bootstrap/3.3.6/css/bootstrap-theme.min.css" rel="stylesheet"/>
  <link href="http://cdn.bootcss.com/bootstrap/3.3.6/css/bootstrap.min.css" rel="stylesheet"/>
  <style type="text/css">
   #xmain img {
                  max-width: 100%;
                  display: block;
                  margin-top: 10px;
                  margin-bottom: 10px;
                }

                #xmain p {
                    line-height:150%;
                    font-size: 16px;
                    margin-top: 20px;
                }

                #xmain h2 {
                    font-size: 24px;
                }

                #xmain h3 {
                    font-size: 20px;
                }

                #xmain h4 {
                    font-size: 18px;
                }


                .header {
	           background-color: #0099ff;
	           color: #ffffff;
	           margin-bottom: 20px;
	        }

	        .header p {
                  margin: 0px;
                  padding: 10px 0;
                  display: inline-block;  
                  vertical-align: middle;
                  font-size: 16px;
               }

               .header a {
                 color: white;
               }

              .header img {
                 height: 25px;
              }
  </style>
  <script src="http://cdn.bootcss.com/jquery/3.0.0/jquery.min.js">
  </script>
  <script src="http://nautstatic-10007657.file.myqcloud.com/static/css/readability.min.js" type="text/javascript">
  </script>
  <script type="text/javascript">
   $(document).ready(function() {
                 var loc = document.location;
                 var uri = {
                  spec: "http://dataunion.org/20751.html",
                  host: "http://dataunion.org",
                  prePath: "http://dataunion.org",
                  scheme: "http",
                  pathBase: "http://dataunion.org/"
                 };
    
                 var documentClone = document.cloneNode(true);
                 var article = new Readability(uri, documentClone).parse();
     
                 document.getElementById("xmain").innerHTML = article.content;
                });
  </script>
  <!-- 1466454823: Accept with keywords: (title(0.4):数盟,Spark,社区,算法,工具, topn(0.333333333333):社区,联系,数盟,特性,深度学习,行业资讯,数据挖掘,有所,工具,图书,原作者,数据分析,Python,职位,职业规划,基础架构,文章,可视化,Spark,性能,算法,小象,spark,合作伙伴,编程语言,人工智能,课程,评论,版本,行业).-->
 </head>
 <body onload="">
  <div class="header">
   <div class="container">
    <div class="row">
     <div class="col-xs-6 col-sm-6 text-left">
      <a href="/databee">
       <img src="http://nautidea-10007657.cos.myqcloud.com/logo_white.png"/>
      </a>
      <a href="/databee">
       <p>
        数螺
       </p>
      </a>
     </div>
     <div class="hidden-xs col-sm-6 text-right">
      <p>
       致力于数据科学的推广和知识传播
      </p>
     </div>
    </div>
   </div>
  </div>
  <div class="container text-center">
   <h1>
    Spark 1.5.0发布：新增机器学习算法工具，扩展Spark R API
   </h1>
  </div>
  <div class="container" id="xmain">
   ﻿﻿
   <title>
    Spark 1.5.0发布：新增机器学习算法工具，扩展Spark R API | 数盟社区
   </title>
   <!-- All in One SEO Pack 2.2.7.6.2 by Michael Torbert of Semper Fi Web Design[32,85] -->
   <!-- /all in one seo pack -->
   <!--
<div align="center">
<a href="http://strata.oreilly.com.cn/hadoop-big-data-cn?cmp=mp-data-confreg-home-stcn16_dataunion_pc" target="_blank"><img src="http://dataunion.org/wp-content/uploads/2016/05/stratabj.jpg"/ ></a>
</div>
-->
   <header id="header-web">
    <div class="header-main">
     <hgroup class="logo">
      <h1>
       <a href="http://dataunion.org/" rel="home" title="数盟社区">
        <img src="http://dataunion.org/wp-content/themes/yzipi/images/logo.png"/>
       </a>
      </h1>
     </hgroup>
     <!--logo-->
     <nav class="header-nav">
      <ul class="menu" id="menu-%e4%b8%bb%e8%8f%9c%e5%8d%95">
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-71" id="menu-item-71">
        <a href="http://dataunion.org/category/events" title="events">
         活动
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-22457" id="menu-item-22457">
          <a href="http://dataunion.org/2016timeline">
           2016档期
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-22459" id="menu-item-22459">
          <a href="http://dataunion.org/category/parterc">
           合作会议
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-post-ancestor menu-item-has-children menu-item-20869" id="menu-item-20869">
        <a href="http://dataunion.org/category/tech" title="articles">
         文章
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-post-ancestor current-menu-parent current-post-parent menu-item-20867" id="menu-item-20867">
          <a href="http://dataunion.org/category/tech/base" title="base">
           基础架构
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3302" id="menu-item-3302">
          <a href="http://dataunion.org/category/tech/ai" title="ai">
           人工智能
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3303" id="menu-item-3303">
          <a href="http://dataunion.org/category/tech/analysis" title="analysis">
           数据分析
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21920" id="menu-item-21920">
          <a href="http://dataunion.org/category/tech/dm">
           数据挖掘
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3314" id="menu-item-3314">
          <a href="http://dataunion.org/category/tech/viz" title="viz">
           可视化
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3305" id="menu-item-3305">
          <a href="http://dataunion.org/category/tech/devl" title="devl">
           编程语言
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-20876" id="menu-item-20876">
        <a href="http://dataunion.org/category/industry">
         行业
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-16328" id="menu-item-16328">
          <a href="http://dataunion.org/category/industry/case" title="case">
           行业应用
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-2112" id="menu-item-2112">
          <a href="http://dataunion.org/category/industry/demo" title="demo">
           Demo展示
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21562" id="menu-item-21562">
          <a href="http://dataunion.org/category/industry/news">
           行业资讯
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-311" id="menu-item-311">
        <a href="http://dataunion.org/category/sources" title="sources">
         资源
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20870" id="menu-item-20870">
        <a href="http://dataunion.org/category/books" title="book">
         图书
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21363" id="menu-item-21363">
        <a href="http://dataunion.org/category/training">
         课程
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-21853" id="menu-item-21853">
        <a href="http://dataunion.org/category/jobs">
         职位
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-22050" id="menu-item-22050">
          <a href="http://dataunion.org/category/career">
           职业规划
          </a>
         </li>
        </ul>
       </li>
      </ul>
     </nav>
     <!--header-nav-->
    </div>
   </header>
   <!--header-web-->
   <div id="main">
    <div id="soutab">
     <form action="http://dataunion.org/" class="search" method="get">
     </form>
    </div>
    <div id="container">
     <nav id="mbx">
      当前位置：
      <a href="http://dataunion.org">
       首页
      </a>
      &gt;
      <a href="http://dataunion.org/category/tech">
       文章
      </a>
      &gt;
      <a href="http://dataunion.org/category/tech/base">
       基础架构
      </a>
      &gt;  正文
     </nav>
     <!--mbx-->
     <article class="content">
      <header align="centre" class="contenttitle">
       <div class="mscc">
        <h1 class="mscctitle">
         <a href="http://dataunion.org/20751.html">
          Spark 1.5.0发布：新增机器学习算法工具，扩展Spark R API
         </a>
        </h1>
        <address class="msccaddress ">
         <em>
          2,628 次阅读 -
         </em>
         <a href="http://dataunion.org/category/tech/base" rel="category tag">
          基础架构
         </a>
        </address>
       </div>
      </header>
      <div class="content-text">
       <p>
        出处：
        <a href="http://www.iteblog.com/archives/1506">
         过往记忆
        </a>
       </p>
       <p>
        <span class="wp_keywordlink_affiliate">
         <a data-original-title="View all posts in Spark" href="http://www.iteblog.com/archives/tag/spark" target="_blank" title="">
          Spark
         </a>
        </span>
        1.5.0是1.x线上的第6个发行版。这个版本共处理了来自230+contributors和80+机构的1400+个patches。
        <span class="wp_keywordlink_affiliate">
         <a data-original-title="View all posts in Spark" href="http://www.iteblog.com/archives/tag/spark" target="_blank" title="">
          Spark
         </a>
        </span>
        1.5的许多改变都是围绕在提升
        <span class="wp_keywordlink_affiliate">
         <a data-original-title="View all posts in Spark" href="http://www.iteblog.com/archives/tag/spark" target="_blank" title="">
          Spark
         </a>
        </span>
        的性能、可用性以及操作稳定性。Spark 1.5.0焦点在Tungsten项目，它主要是通过对低层次的组建进行优化从而提升Spark的性能。Spark 1.5版本为Streaming增加了operational特性，比如支持backpressure。另外比较重要的更新就是新增加了一些机器学习算法和工具，并扩展了Spark R的相关API。主要做了以下修改：
       </p>
       <h2>
        APIs: RDD, DataFrame和SQL
       </h2>
       <p>
        1、Consistent resolution of column names (see Behavior Changes section)
        <br/>
        2、SPARK-3947: New experimental user-defined aggregate function (UDAF) interface
        <br/>
        3、SPARK-8300: DataFrame hint for broadcast joins
        <br/>
        4、SPARK-8668: expr function for turning a SQL expression into a DataFrame column
        <br/>
        5、SPARK-9076: Improved support for NaN values
        <br/>
        5.1、NaN functions: isnan, nanvl
        <br/>
        5.2、dropna/fillna also fill/drop NaN values in addition to NULL values
        <br/>
        5.3、Equality test on NaN = NaN returns true
        <br/>
        5.4、NaN is greater than all other values
        <br/>
        5.5、In aggregation, NaN values go into one group
        <br/>
        6、SPARK-8828: Sum function returns null when all input values are nulls
        <br/>
        7、Data types
        <br/>
        7.1、SPARK-8943: CalendarIntervalType for time intervals
        <br/>
        7.2、SPARK-7937: Support ordering on StructType
        <br/>
        7.3、SPARK-8866: TimestampType’s precision is reduced to 1 microseconds (1us)
        <br/>
        8、SPARK-8159: Added ~100 functions, including date/time, string, math.
        <br/>
        9、SPARK-8947: Improved type coercion and error reporting in plan analysis phase (i.e. most errors should be reported in analysis time, rather than execution time)
        <br/>
        10、SPARK-1855: Memory and local disk only checkpointing support
       </p>
       <h2>
        Backend Execution: DataFrame and SQL
       </h2>
       <p>
        1、对大多数的DataFrame/SQL函数来说，Code generation默认情况下是开启的。
        <br/>
        2、DataFrame/SQL中的aggregation execution有所提升。
        <br/>
        2.1、Cache friendly in-memory hash map layout
        <br/>
        2.2、Fallback to external-sort-based aggregation when memory is exhausted
        <br/>
        2.3、aggregations操作中默认开启Code generation
        <br/>
        3、对 DataFrame/SQL中的Join执行有所提升
        <br/>
        3.1、Prefer (external) sort-merge join over hash join in shuffle joins (for left/right outer and inner joins), i.e. join 　　　　3.2、data size is now bounded by disk rather than memory
        <br/>
        3.3、Support using (external) sort-merge join method for left/right outer joins
        <br/>
        3.4、Support for broadcast outer join
        <br/>
        4、对DataFrame/SQL中的排序引擎有所提升
        <br/>
        4.1、Cache-friendly in-memory layout for sorting
        <br/>
        4.2、Fallback to external sorting when data exceeds memory size
        <br/>
        4.3、Code generated comparator for fast comparisons
        <br/>
        5、Native memory management &amp; representation
        <br/>
        5.1、Compact binary in-memory data representation, leading to lower memory usage
        <br/>
        5.2、Execution memory is explicitly accounted for, without relying on JVM GC, leading to less GC and more robust memory management
        <br/>
        6、SPARK-8638: Improved performance &amp; memory usage in window functions
        <br/>
        7、Metrics instrumentation, reporting, and visualization
        <br/>
        7.1、SPARK-8856: Plan visualization for DataFrame/SQL
        <br/>
        7.2、SPARK-8735: Expose metrics for runtime memory usage in web UI
        <br/>
        7.3、SPARK-4598: Pagination for jobs with large number of tasks in web UI
       </p>
       <h2>
        Integrations: Data Sources, Hive, Hadoop, Mesos and Cluster Management
       </h2>
       <p>
        1、Mesos
        <br/>
        1.1、SPARK-6284: Support framework authentication and Mesos roles
        <br/>
        1.2、SPARK-6287: Dynamic allocation in Mesos coarse-grained mode
        <br/>
        1.3、SPARK-6707: User specified constraints on Mesos slave attributes
        <br/>
        2、YARN
        <br/>
        2.1、SPARK-4352: Dynamic allocation in YARN works with preferred locations
        <br/>
        3、Standalone Cluster Manager
        <br/>
        3.1、SPARK-4751: Dynamic resource allocation support
        <br/>
        4、SPARK-6906: Improved Hive and metastore support
        <br/>
        4.1、SPARK-8131: Improved Hive database support
        <br/>
        4.2、Upgraded Hive dependency Hive 1.2
        <br/>
        4.3、Support connecting to Hive 0.13, 0.14, 1.0/0.14.1, 1.1, 1.2 metastore
        <br/>
        4.4、Support partition pruning pushdown into the metastore (off by default; config flag spark.sql.hive.metastorePartitionPruning)
        <br/>
        4.5、Support persisting data in Hive compatible format in metastore
        <br/>
        5、SPARK-9381: Support data partitioning for JSON data sources
        <br/>
        6、SPARK-5463: Parquet improvements
        <br/>
        6.1、将Parquet升级到 1.7
        <br/>
        6.2、Speedup metadata discovery and schema merging
        <br/>
        6.3、Predicate pushdown on by default
        <br/>
        6.4、SPARK-6774: Support for reading non-standard legacy Parquet files generated by various libraries/systems by fully implementing all backwards-compatibility rules defined in parquet-format spec
        <br/>
        6.5、SPARK-4176: Support for writing decimal values with precision greater than 18
        <br/>
        7、ORC improvements (various bug fixes)
        <br/>
        8、SPARK-8890: Faster and more robust dynamic partition insert
        <br/>
        9、SPARK-9486: DataSourceRegister interface for external data sources to specify short names
       </p>
       <h2>
        R语言
       </h2>
       <p>
        1、SPARK-6797: Support for YARN cluster mode in R
        <br/>
        2、SPARK-6805: GLMs with R formula, binomial/Gaussian families, and elastic-net regularization
        <br/>
        3、SPARK-8742: Improved error messages for R
        <br/>
        4、SPARK-9315: Aliases to make DataFrame functions more R-like
       </p>
       <h2>
        Machine Learning and Advanced Analytics
       </h2>
       <p>
        1、SPARK-8521: New Feature transformers: CountVectorizer, Discrete Cosine transformation, MinMaxScaler, NGram, PCA, RFormula, StopWordsRemover, and VectorSlicer.
        <br/>
        2、New Estimators in Pipeline API: SPARK-8600 naive Bayes, SPARK-7879 k-means, and SPARK-8671 isotonic regression.
        <br/>
        3、New Algorithms: SPARK-9471 multilayer perceptron classifier, SPARK-6487 PrefixSpan for sequential pattern mining, SPARK-8559 association rule generation, SPARK-8598 1-sample Kolmogorov-Smirnov test, etc.
        <br/>
        4、提升现有的算法
        <br/>
        4.1、LDA: online LDA performance, asymmetric doc concentration, perplexity, log-likelihood, top topics/documents, save/load, etc.
        <br/>
        4.2、Trees and ensembles: class probabilities, feature importance for random forests, thresholds for classification, checkpointing for GBTs, etc.
        <br/>
        4.3、Pregel-API: more efficient Pregel API implementation for GraphX.
        <br/>
        4.4、GMM: distribute matrix inversions.
        <br/>
        5、Model summary for linear and logistic regression.
        <br/>
        6、Python API: distributed matrices, streaming k-means and linear models, LDA, power iteration clustering, etc.
        <br/>
        7、Tuning and evaluation: train-validation split and multiclass classification evaluator.
        <br/>
        8、Documentation: document the release version of public API methods
       </p>
       <h2>
        Spark Streaming
       </h2>
       <p>
        1、SPARK-7398: Backpressure: Automatic and dynamic rate controlling in Spark Streaming for handling bursty input streams. This allows a streaming pipeline to dynamically adapt to changes in ingestion rates and computation loads. This works with receivers, as well as, the Direct Kafka approach.
        <br/>
        2、Python API for streaming sources
        <br/>
        2.1、SPARK-8389: Kafka offsets of Direct Kafka streams available through Python API
        <br/>
        2.2、SPARK-8564: Kinesis Python API
        <br/>
        2.3、SPARK-8378: Flume Python API
        <br/>
        2.4、SPARK-5155: MQTT Python API
        <br/>
        3、SPARK-3258: Python API for streaming machine learning algorithms: K-Means, linear regression, and logistic regression
        <br/>
        4、SPARK-9215: Improved reliability of Kinesis streams : No need for enabling write ahead logs for saving and recovering received data across driver failures
        <br/>
        5、Direct Kafka API graduated: Not experimental any more.
        <br/>
        6、SPARK-8701: Input metadata in UI: Kafka offsets, and input files are visible in the batch details UI
        <br/>
        7、SPARK-8882: Better load balancing and scheduling of receivers across cluster
        <br/>
        8、SPARK-4072: Include streaming storage in web UI
       </p>
       <h2>
        遗弃、移出、Configs和行为变化
       </h2>
       <h3>
        Spark Core
       </h3>
       <p>
        1、DAGScheduler中的local task execution模块被移出
        <br/>
        2、Driver和executor的默认内存从512m升到1G
        <br/>
        3、JVM中MaxPermSize的默认配置从128m升到256m
        <br/>
        4、spark-shell的默认日志级别从INFO提升到WARN
        <br/>
        5、基于NIO的ConnectionManager已经遗弃，并且将在1.6版本中移出。
       </p>
       <h2>
        Spark SQL &amp; DataFrames
       </h2>
       <p>
        1、Optimized execution using manually managed memory (Tungsten) is now enabled by default, along with code generation for expression evaluation. These features can both be disabled by setting spark.sql.tungsten.enabled to false.
        <br/>
        2、Parquet schema merging is no longer enabled by default. It can be re-enabled by setting spark.sql.parquet.mergeSchema to true.
        <br/>
        3、Resolution of strings to columns in Python now supports using dots (.) to qualify the column or access nested values. For example df[‘table.column.nestedField’]. However, this means that if your column name contains any dots you must now escape them using backticks (e.g., table.
        <code>
         column.with.dots
        </code>
        .nested).
        <br/>
        4、In-memory columnar storage partition pruning is on by default. It can be disabled by setting spark.sql.inMemoryColumnarStorage.partitionPruning to false.
        <br/>
        5、Unlimited precision decimal columns are no longer supported, instead Spark SQL enforces a maximum precision of 38. When inferring schema from BigDecimal objects, a precision of (38, 18) is now used. When no precision is specified in DDL then the default remains Decimal(10, 0).
        <br/>
        6、Timestamps are now processed at a precision of 1us, rather than 100ns.
        <br/>
        7、Sum function returns null when all input values are nulls (null before 1.4, 0 in 1.4).
        <br/>
        8、In the sql dialect, floating point numbers are now parsed as decimal. HiveQL parsing remains unchanged.
        <br/>
        9、The canonical name of SQL/DataFrame functions are now lower case (e.g. sum vs SUM).
        <br/>
        10、It has been determined that using the DirectOutputCommitter when speculation is enabled is unsafe and thus this output committer will not be used by parquet when speculation is on, independent of configuration.
        <br/>
        11、JSON data source will not automatically load new files that are created by other applications (i.e. files that are not inserted to the dataset through Spark SQL). For a JSON persistent table (i.e. the metadata of the table is stored in Hive Metastore), users can use REFRESH TABLE SQL command or HiveContext’s refreshTable method to include those new files to the table. For a DataFrame representing a JSON dataset, users need to recreate the DataFrame and the new DataFrame will include new files.
       </p>
       <h2>
        Spark Streaming
       </h2>
       <p>
        1、新的实验性backpressure特性可以通过将spark.streaming.backpressure.enabled设置为true来开启。
        <br/>
        2、Write Ahead Log does not need to be abled for Kinesis streams. The updated Kinesis receiver keeps track of Kinesis sequence numbers received in each batch, and uses that information re-read the necessary data while recovering from failures.
        <br/>
        3、The number of times the receivers are relaunched on failure are not limited by the max Spark task attempts. The system will always try to relaunch receivers after failures until the StreamingContext is stopped.
        <br/>
        4、Improved load balancing of receivers across the executors, even after relaunching.
        <br/>
        5、Enabling checkpointing when using queueStream throws exception as queueStream cannot be checkpointed. However, we found this to break certain existing apps. So this change will be reverted in Spark 1.5.1.
       </p>
       <h2>
        MLlib
       </h2>
       <p>
        1、In the spark.mllib package, there are no breaking API changes but some behavior changes:
        <br/>
        1.1、SPARK-9005: RegressionMetrics.explainedVariance returns the average regression sum of squares.
        <br/>
        1.2、SPARK-8600: NaiveBayesModel.labels become sorted.
        <br/>
        1.3、SPARK-3382: GradientDescent has a default convergence tolerance 1e-3, and hence iterations might end earlier than 1.4.
        <br/>
        2、In the experimental spark.ml package, there exists one breaking API change and one behavior change:
       </p>
       <p>
        2.1、SPARK-9268: Java’s varargs support is removed from Params.setDefault due to a Scala compiler bug.
        <br/>
        2.2、SPARK-10097: Evaluator.isLargerBetter is added to indicate metric ordering. Metrics like RMSE no longer flip signs as in 1.4.
       </p>
      </div>
      <div>
       <strong>
        注：转载文章均来自于公开网络，仅供学习使用，不会用于任何商业用途，如果侵犯到原作者的权益，请您与我们联系删除或者授权事宜，联系邮箱：contact@dataunion.org。转载数盟网站文章请注明原文章作者，否则产生的任何版权纠纷与数盟无关。
       </strong>
      </div>
      <!--content_text-->
      <div class="fenxian">
       <!-- JiaThis Button BEGIN -->
       <div class="jiathis_style_32x32">
        <p class="jiathis_button_weixin">
        </p>
        <p class="jiathis_button_tsina">
        </p>
        <p class="jiathis_button_qzone">
        </p>
        <p class="jiathis_button_cqq">
        </p>
        <p class="jiathis_button_tumblr">
        </p>
        <a class="jiathis jiathis_txt jtico jtico_jiathis" href="http://www.jiathis.com/share" target="_blank">
        </a>
        <p class="jiathis_counter_style">
        </p>
       </div>
       <!-- JiaThis Button END -->
      </div>
     </article>
     <!--content-->
     <!--相关文章-->
     <div class="xianguan">
      <div class="xianguantitle">
       相关文章！
      </div>
      <ul class="pic">
       <li>
        <a href="http://dataunion.org/20824.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/t018630756a7e263b33-300x165.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20824.html" rel="bookmark" title="如何判断一笔交易是否属于欺诈？你只是需要一点数据挖掘">
         如何判断一笔交易是否属于欺诈？你只是需要一点数据挖掘
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20820.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/1-300x200.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20820.html" rel="bookmark" title="人们对Python在企业级开发中的10大误解">
         人们对Python在企业级开发中的10大误解
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20811.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/t0133fcacae8523307b_副本-300x200.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20811.html" rel="bookmark" title="大神亲传：26条深度学习的金科玉律！">
         大神亲传：26条深度学习的金科玉律！
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20808.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/640.webp-11-300x137.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20808.html" rel="bookmark" title="我们是如何在一张地图上表现86万个数据的">
         我们是如何在一张地图上表现86万个数据的
        </a>
       </li>
      </ul>
     </div>
     <!--相关文章-->
     <div class="comment" id="comments">
      <!-- You can start editing here. -->
      <!-- If comments are open, but there are no comments. -->
      <div class="title">
       期待你一针见血的评论，Come on！
      </div>
      <div id="respond">
       <p>
        不用想啦，马上
        <a href="http://dataunion.org/wp-login.php?redirect_to=http%3A%2F%2Fdataunion.org%2F20751.html">
         "登录"
        </a>
        发表自已的想法.
       </p>
      </div>
     </div>
     <!-- .nav-single -->
    </div>
    <!--Container End-->
    <aside id="sitebar">
     <div class="sitebar_list2">
      <div class="wptag">
       <span class="tagtitle">
        热门标签+
       </span>
       <div class="tagg">
        <ul class="menu" id="menu-%e5%8f%8b%e6%83%85%e9%93%be%e6%8e%a5">
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-1605" id="menu-item-1605">
          <a href="http://taidizh.com/">
           泰迪智慧
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-20884" id="menu-item-20884">
          <a href="http://www.transwarp.cn/">
           星环科技
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-3538" id="menu-item-3538">
          <a href="http://datall.org/">
           珈和遥感
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-20888" id="menu-item-20888">
          <a href="http://www.chinahadoop.cn/">
           小象学院
          </a>
         </li>
        </ul>
       </div>
      </div>
     </div>
     <div class="sitebar_list">
      <div class="textwidget">
       <div align="center">
        <a href="http://study.163.com/course/courseMain.htm?courseId=991022" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2016/03/dv.jpg"/>
        </a>
       </div>
      </div>
     </div>
     <div class="sitebar_list">
      <h4 class="sitebar_title">
       文章分类
      </h4>
      <div class="tagcloud">
       <a class="tag-link-44" href="http://dataunion.org/category/industry/demo" style="font-size: 10.204724409449pt;" title="4个话题">
        Demo展示
       </a>
       <a class="tag-link-31" href="http://dataunion.org/category/experts" style="font-size: 15.826771653543pt;" title="52个话题">
        专家团队
       </a>
       <a class="tag-link-870" href="http://dataunion.org/category/tech/ai" style="font-size: 19.795275590551pt;" title="273个话题">
        人工智能
       </a>
       <a class="tag-link-488" href="http://dataunion.org/category/%e5%8a%a0%e5%85%a5%e6%95%b0%e7%9b%9f" style="font-size: 8pt;" title="1个话题">
        加入数盟
       </a>
       <a class="tag-link-869" href="http://dataunion.org/category/tech/viz" style="font-size: 17.204724409449pt;" title="93个话题">
        可视化
       </a>
       <a class="tag-link-30" href="http://dataunion.org/category/partners" style="font-size: 10.645669291339pt;" title="5个话题">
        合作伙伴
       </a>
       <a class="tag-link-889" href="http://dataunion.org/category/parterc" style="font-size: 11.582677165354pt;" title="8个话题">
        合作会议
       </a>
       <a class="tag-link-104" href="http://dataunion.org/category/books" style="font-size: 12.96062992126pt;" title="15个话题">
        图书
       </a>
       <a class="tag-link-220" href="http://dataunion.org/category/tech/base" style="font-size: 19.850393700787pt;" title="281个话题">
        基础架构
       </a>
       <a class="tag-link-219" href="http://dataunion.org/category/tech/analysis" style="font-size: 19.409448818898pt;" title="232个话题">
        数据分析
       </a>
       <a class="tag-link-887" href="http://dataunion.org/category/tech/dm" style="font-size: 13.291338582677pt;" title="17个话题">
        数据挖掘
       </a>
       <a class="tag-link-34" href="http://dataunion.org/category/tech" style="font-size: 20.732283464567pt;" title="404个话题">
        文章
       </a>
       <a class="tag-link-1" href="http://dataunion.org/category/uncategorized" style="font-size: 22pt;" title="693个话题">
        未分类
       </a>
       <a class="tag-link-4" href="http://dataunion.org/category/events" style="font-size: 14.503937007874pt;" title="29个话题">
        活动
       </a>
       <a class="tag-link-890" href="http://dataunion.org/category/tech/%e6%b7%b1%e5%ba%a6%e5%ad%a6%e4%b9%a0" style="font-size: 10.204724409449pt;" title="4个话题">
        深度学习
       </a>
       <a class="tag-link-221" href="http://dataunion.org/category/tech/devl" style="font-size: 18.968503937008pt;" title="193个话题">
        编程语言
       </a>
       <a class="tag-link-888" href="http://dataunion.org/category/career" style="font-size: 15.661417322835pt;" title="48个话题">
        职业规划
       </a>
       <a class="tag-link-5" href="http://dataunion.org/category/jobs" style="font-size: 14.11811023622pt;" title="25个话题">
        职位
       </a>
       <a class="tag-link-871" href="http://dataunion.org/category/industry" style="font-size: 15.716535433071pt;" title="49个话题">
        行业
       </a>
       <a class="tag-link-613" href="http://dataunion.org/category/industry/case" style="font-size: 16.984251968504pt;" title="84个话题">
        行业应用
       </a>
       <a class="tag-link-885" href="http://dataunion.org/category/industry/news" style="font-size: 17.425196850394pt;" title="102个话题">
        行业资讯
       </a>
       <a class="tag-link-10" href="http://dataunion.org/category/training" style="font-size: 14.228346456693pt;" title="26个话题">
        课程
       </a>
       <a class="tag-link-16" href="http://dataunion.org/category/sources" style="font-size: 15.661417322835pt;" title="48个话题">
        资源
       </a>
      </div>
     </div>
     <div class="sitebar_list">
      <h4 class="sitebar_title">
       功能
      </h4>
      <ul>
       <li>
        <a href="http://dataunion.org/wp-login.php?action=register">
         注册
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/wp-login.php">
         登录
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/feed">
         文章
         <abbr title="Really Simple Syndication">
          RSS
         </abbr>
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/comments/feed">
         评论
         <abbr title="Really Simple Syndication">
          RSS
         </abbr>
        </a>
       </li>
       <li>
        <a href="https://cn.wordpress.org/" title="基于WordPress，一个优美、先进的个人信息发布平台。">
         WordPress.org
        </a>
       </li>
      </ul>
     </div>
    </aside>
    <div class="clear">
    </div>
   </div>
   <!--main-->
   ﻿
   <footer id="dibu">
    <div class="about">
     <div class="right">
      <ul class="menu" id="menu-%e5%ba%95%e9%83%a8%e8%8f%9c%e5%8d%95">
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-18024" id="menu-item-18024">
        <a href="http://dataunion.org/category/partners">
         合作伙伴
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-20881" id="menu-item-20881">
        <a href="http://dataunion.org/contribute">
         文章投稿
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20872" id="menu-item-20872">
        <a href="http://dataunion.org/category/%e5%8a%a0%e5%85%a5%e6%95%b0%e7%9b%9f">
         加入数盟
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-22441" id="menu-item-22441">
        <a href="http://dataunion.org/f-links">
         友情链接
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-20874" id="menu-item-20874">
        <a href="http://dataunion.org/aboutus">
         关于数盟
        </a>
       </li>
      </ul>
      <p class="banquan">
       数盟社区        ，
        做最棒的数据科学社区
      </p>
     </div>
     <div class="left">
      <ul class="bottomlist">
       <li>
        <a href="http://weibo.com/DataScientistUnion  " target="_blank" 　title="">
         <img src="http://dataunion.org/wp-content/themes/yzipi/images/weibo.png"/>
        </a>
       </li>
       <li>
        <a class="cd-popup-trigger" href="http://dataunion.org/20751.html#0">
         <img src="http://dataunion.org/wp-content/themes/yzipi/images/weixin.png"/>
        </a>
       </li>
      </ul>
      <div class="cd-popup">
       <div class="cd-popup-container">
        <h1>
         扫描二维码,加微信公众号
        </h1>
        <img src="http://dataunion.org/wp-content/themes/yzipi/images/2014-12-06-1515289049.png"/>
        <a class="cd-popup-close" href="http://dataunion.org/20751.html">
        </a>
       </div>
       <!-- cd-popup-container -->
      </div>
      <!-- cd-popup -->
     </div>
    </div>
    <!--about-->
    <div class="bottom">
     <a href="http://dataunion.org/">
      数盟社区
     </a>
     <a href="http://www.miitbeian.gov.cn/" rel="external nofollow" target="_blank">
      京ICP备14026740号
     </a>
     联系我们：
     <a href="mailto:contact@dataunion.org" target="_blank">
      contact@dataunion.org
     </a>
     <div class="tongji">
     </div>
     <!--bottom-->
     <div class="scroll" id="scroll" style="display:none;">
      ︿
     </div>
    </div>
   </footer>
   <!--dibu-->
  </div>
 </body>
</html>